In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import folium
import random
from pulp import *
import json
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows",700)
from haversine import haversine
In [2]:
data = pd.read_csv("Boston Crime.csv", encoding = "ISO-8859-1", low_memory=False)
data['OCCURRED_ON_DATE']=pd.to_datetime(data['OCCURRED_ON_DATE'])
In [3]:
data.groupby(["INCIDENT_NUMBER"])["OFFENSE_CODE_GROUP"].count().sort_values(ascending=False).head()
Out[3]:
INCIDENT_NUMBER
I162030584    13
I152080623    11
I172096394    10
I172013170    10
I182065208    10
Name: OFFENSE_CODE_GROUP, dtype: int64
In [4]:
data[data.INCIDENT_NUMBER=="I162030584"].head()
Out[4]:
INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP OFFENSE_DESCRIPTION DISTRICT REPORTING_AREA SHOOTING OCCURRED_ON_DATE YEAR MONTH DAY_OF_WEEK HOUR UCR_PART STREET Lat Long Location
246401 I162030584 423 Aggravated Assault ASSAULT - AGGRAVATED C11 243 NaN 2016-04-20 11:07:00 2016 4 Wednesday 11 Part One TRESCOTT ST 42.3158 -71.060521 (42.31580044, -71.06052053)
246402 I162030584 802 Simple Assault ASSAULT SIMPLE - BATTERY C11 243 NaN 2016-04-20 11:07:00 2016 4 Wednesday 11 Part Two TRESCOTT ST 42.3158 -71.060521 (42.31580044, -71.06052053)
246403 I162030584 1501 Firearm Violations WEAPON - FIREARM - CARRYING / POSSESSING, ETC C11 243 NaN 2016-04-20 11:07:00 2016 4 Wednesday 11 Part Two TRESCOTT ST 42.3158 -71.060521 (42.31580044, -71.06052053)
246404 I162030584 1504 Other WEAPON - OTHER - OTHER VIOLATION C11 243 NaN 2016-04-20 11:07:00 2016 4 Wednesday 11 Part Two TRESCOTT ST 42.3158 -71.060521 (42.31580044, -71.06052053)
246405 I162030584 1843 Drug Violation DRUGS - POSS CLASS B - INTENT TO MFR DIST DISP C11 243 NaN 2016-04-20 11:07:00 2016 4 Wednesday 11 Part Two TRESCOTT ST 42.3158 -71.060521 (42.31580044, -71.06052053)

Since some crimes(incidents) consist of different offense types,I will create two data with original data and non duplicated data.

In [5]:
data_non_duplicates=data.drop_duplicates(subset="INCIDENT_NUMBER", keep="first").reset_index(drop=True)
In [6]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 327820 entries, 0 to 327819
Data columns (total 17 columns):
 #   Column               Non-Null Count   Dtype         
---  ------               --------------   -----         
 0   INCIDENT_NUMBER      327820 non-null  object        
 1   OFFENSE_CODE         327820 non-null  int64         
 2   OFFENSE_CODE_GROUP   327820 non-null  object        
 3   OFFENSE_DESCRIPTION  327820 non-null  object        
 4   DISTRICT             326046 non-null  object        
 5   REPORTING_AREA       327820 non-null  object        
 6   SHOOTING             1055 non-null    object        
 7   OCCURRED_ON_DATE     327820 non-null  datetime64[ns]
 8   YEAR                 327820 non-null  int64         
 9   MONTH                327820 non-null  int64         
 10  DAY_OF_WEEK          327820 non-null  object        
 11  HOUR                 327820 non-null  int64         
 12  UCR_PART             327727 non-null  object        
 13  STREET               316843 non-null  object        
 14  Lat                  307188 non-null  float64       
 15  Long                 307188 non-null  float64       
 16  Location             327820 non-null  object        
dtypes: datetime64[ns](1), float64(2), int64(4), object(10)
memory usage: 42.5+ MB
In [7]:
data.groupby("YEAR")["INCIDENT_NUMBER"].count()
Out[7]:
YEAR
2015     53392
2016     99134
2017    100938
2018     74356
Name: INCIDENT_NUMBER, dtype: int64
In [8]:
df_2015=data[data.YEAR==2015]
print(df_2015["OCCURRED_ON_DATE"].min())
df_2018=data[data.YEAR==2018]
print(df_2018["OCCURRED_ON_DATE"].max())
2015-06-15 00:00:00
2018-10-03 20:49:00
In [9]:
print(dt.datetime.strptime("2016-1-1", '%Y-%m-%d') - dt.datetime.strptime("2015-6-15", '%Y-%m-%d'))
print(dt.datetime.strptime("2018-10-03", '%Y-%m-%d') - dt.datetime.strptime("2018-1-1", '%Y-%m-%d'))
200 days, 0:00:00
275 days, 0:00:00
In [10]:
crime_data=pd.DataFrame(data.groupby("YEAR")["INCIDENT_NUMBER"].count()).rename(columns={"INCIDENT_NUMBER":"CRIME_NUMBER"})
crime_data["Total_Day"]=[200,365,365,275]
crime_data["Daily_Average"]=crime_data["CRIME_NUMBER"]/crime_data["Total_Day"]
crime_data
Out[10]:
CRIME_NUMBER Total_Day Daily_Average
YEAR
2015 53392 200 266.960000
2016 99134 365 271.600000
2017 100938 365 276.542466
2018 74356 275 270.385455
In [11]:
sns.set()
p0=plt.figure(figsize=(7,6))
plt.title(r'2015-2018 Total')
plt.bar(range(crime_data.index.shape[0]),crime_data.loc[:,'CRIME_NUMBER'])
plt.xlabel('CRIME_NUMBERS')
plt.xlabel('Years')
plt.ylabel('Yearly Crime Numbers')
plt.xticks(range(crime_data.index.shape[0]),crime_data.index)
x=np.arange(crime_data.index.shape[0])
y=np.array(crime_data['CRIME_NUMBER'])
for i,j in zip(x,y):
    plt.text(i,j,'%d'%j,ha='center')
plt.show()
In [12]:
sns.set()
p0=plt.figure(figsize=(7,6))
plt.title(r'2015-2018 Average')
plt.bar(range(crime_data.index.shape[0]),crime_data.loc[:,'Daily_Average'])
plt.xlabel('Years')
plt.ylabel('Daily Average of Crimes')
plt.xticks(range(crime_data.index.shape[0]),crime_data.index)
x=np.arange(crime_data.index.shape[0])
y=np.array(crime_data['Daily_Average'])
for i,j in zip(x,y):
    plt.text(i,j,'%d'%j,ha='center')
    
plt.show()
In [13]:
Montly_Crime_Data=pd.DataFrame(data_non_duplicates.groupby(["YEAR","MONTH"])["INCIDENT_NUMBER"].count()).reset_index().rename(
    columns={"INCIDENT_NUMBER":"Crime_Number"})
Montly_Crime_Data.head()
Out[13]:
YEAR MONTH Crime_Number
0 2015 6 3719
1 2015 7 7284
2 2015 8 7320
3 2015 9 7380
4 2015 10 7287
In [14]:
sns.set_theme(style="whitegrid")
sns.set(rc={'figure.figsize':(15,20)})
g = sns.catplot(
    data=Montly_Crime_Data, kind="bar",
    x="MONTH", y="Crime_Number", hue="YEAR",
    ci="sd", palette="dark", alpha=.8, height=7, aspect=14/10
)
g.despine(left=True)
g.set_axis_labels("Months", "Total Crimes")
g.legend.set_title("Years")
In [ ]:
 

Let's first find the most frequently committed crimes

In [15]:
most_freq_crimes=data.groupby("OFFENSE_CODE_GROUP")["INCIDENT_NUMBER"].count().sort_values(ascending=False)[:5].index
freq_crimes=data[data.OFFENSE_CODE_GROUP.isin(most_freq_crimes)].reset_index(drop=True)
freq_crimes=freq_crimes.dropna(subset = ['DISTRICT'])
In [16]:
pd.DataFrame(data.groupby("OFFENSE_CODE_GROUP")["INCIDENT_NUMBER"].count().sort_values(ascending=False)[:10]).reset_index().rename(columns=
                                                                                                                                 {"INCIDENT_NUMBER":"INCIDENT_NUMBER_TOTAL"})
Out[16]:
OFFENSE_CODE_GROUP INCIDENT_NUMBER_TOTAL
0 Motor Vehicle Accident Response 38134
1 Larceny 26670
2 Medical Assistance 24226
3 Investigate Person 19176
4 Other 18612
5 Drug Violation 17037
6 Simple Assault 16263
7 Vandalism 15810
8 Verbal Disputes 13478
9 Towed 11632
In [17]:
pd.DataFrame(freq_crimes.groupby(["DISTRICT"])["INCIDENT_NUMBER"].count()).reset_index()
Out[17]:
DISTRICT INCIDENT_NUMBER
0 A1 13862
1 A15 2777
2 A7 5285
3 B2 19182
4 B3 12276
5 C11 16343
6 C6 9400
7 D14 8389
8 D4 18348
9 E13 6869
10 E18 7398
11 E5 5919
In [18]:
freq_crimes_data=pd.merge(left=pd.DataFrame(freq_crimes.groupby(["OFFENSE_CODE_GROUP","DISTRICT"])["INCIDENT_NUMBER"].count()).reset_index(),
        right=pd.DataFrame(data.groupby("OFFENSE_CODE_GROUP")["INCIDENT_NUMBER"].count().sort_values(ascending=False)[:5]).reset_index().rename(columns={"INCIDENT_NUMBER":"INCIDENT_NUMBER_TOTAL"}),
        left_on="OFFENSE_CODE_GROUP",right_on="OFFENSE_CODE_GROUP",how="left")
freq_crimes_data=pd.merge(left=freq_crimes_data,right=pd.DataFrame(freq_crimes.groupby(["DISTRICT"])["INCIDENT_NUMBER"].count()).reset_index().rename(columns={"INCIDENT_NUMBER":"INCIDENT_NUMBER_DISTRICT"}),
         left_on="DISTRICT",right_on="DISTRICT",how="left")
In [19]:
freq_crimes_data["Offense_Percentage"]=(freq_crimes_data["INCIDENT_NUMBER"]/freq_crimes_data["INCIDENT_NUMBER_TOTAL"])*100
freq_crimes_data["District_Percentage"]=(freq_crimes_data["INCIDENT_NUMBER"]/freq_crimes_data["INCIDENT_NUMBER_DISTRICT"])*100
freq_crimes_data=freq_crimes_data.drop(["INCIDENT_NUMBER_TOTAL","INCIDENT_NUMBER_DISTRICT"],1)
In [20]:
freq_crimes_data=freq_crimes_data.sort_values(["DISTRICT", "INCIDENT_NUMBER"], ascending = (True, False)).reset_index(drop=True)
In [21]:
freq_crimes_data.head(10)
Out[21]:
OFFENSE_CODE_GROUP DISTRICT INCIDENT_NUMBER Offense_Percentage District_Percentage
0 Larceny A1 4849 18.181477 34.980522
1 Motor Vehicle Accident Response A1 2936 7.699166 21.180205
2 Medical Assistance A1 2343 9.671427 16.902323
3 Other A1 2100 11.283043 15.149329
4 Investigate Person A1 1634 8.521068 11.787621
5 Motor Vehicle Accident Response A15 984 2.580374 35.433921
6 Larceny A15 517 1.938508 18.617213
7 Medical Assistance A15 494 2.039132 17.788981
8 Investigate Person A15 476 2.482270 17.140799
9 Other A15 306 1.644101 11.019085
In [22]:
plt.rcParams['axes.labelsize'] = 16
plt.rcParams['axes.titlesize'] = 16
fig = plt.figure(figsize = (8, 5))
 
# creating the bar plot
plt.bar(freq_crimes_data[freq_crimes_data.DISTRICT==freq_crimes_data["DISTRICT"].unique()[6]]["OFFENSE_CODE_GROUP"],
       freq_crimes_data[freq_crimes_data.DISTRICT==freq_crimes_data["DISTRICT"].unique()[6]]["District_Percentage"], color ='maroon',width=.5)
plt.xticks(rotation = 90)
plt.xlabel("Offenses",fontsize=18)
plt.ylabel("Percentage Of Incidents")
plt.title(freq_crimes_data["DISTRICT"].unique()[10])
plt.show()
In [23]:
fig, axes = plt.subplots(figsize=(12, 34) , nrows = 6, ncols = 2)

#five categorical columns and three numerical columns of interest
for i, category in enumerate(freq_crimes_data["DISTRICT"].unique()):   
    ax = fig.add_subplot(6,2,i+1)
    plt.bar(freq_crimes_data[freq_crimes_data.DISTRICT==category]["OFFENSE_CODE_GROUP"],
       freq_crimes_data[freq_crimes_data.DISTRICT==category]["District_Percentage"])
    plt.xticks(rotation = 30)
    plt.title(category)
In [24]:
freq_crimes_data=freq_crimes_data.sort_values(["OFFENSE_CODE_GROUP", "Offense_Percentage"], ascending = (True, False)).reset_index(drop=True)
In [25]:
fig = plt.figure(figsize = (8, 5))
 
# creating the bar plot
plt.bar(freq_crimes_data[freq_crimes_data.OFFENSE_CODE_GROUP==freq_crimes_data["OFFENSE_CODE_GROUP"].unique()[4]]["DISTRICT"],
       freq_crimes_data[freq_crimes_data.OFFENSE_CODE_GROUP==freq_crimes_data["OFFENSE_CODE_GROUP"].unique()[4]]["Offense_Percentage"], color ='olive',width=.6)

plt.xticks(rotation = 0)
plt.xlabel("Districts")
plt.ylabel("Percentage Of Incidents")
plt.title(freq_crimes_data["OFFENSE_CODE_GROUP"].unique()[4])
plt.show()
In [26]:
freq_crimes_data.head(20)
Out[26]:
OFFENSE_CODE_GROUP DISTRICT INCIDENT_NUMBER Offense_Percentage District_Percentage
0 Investigate Person B2 2806 14.632874 14.628297
1 Investigate Person C11 2744 14.309554 16.790063
2 Investigate Person B3 2535 13.219650 20.650049
3 Investigate Person D4 2177 11.352733 11.865053
4 Investigate Person A1 1634 8.521068 11.787621
5 Investigate Person C6 1413 7.368586 15.031915
6 Investigate Person D14 1242 6.476846 14.805102
7 Investigate Person E18 1221 6.367334 16.504461
8 Investigate Person E5 1062 5.538173 17.942220
9 Investigate Person E13 915 4.771589 13.320716
10 Investigate Person A7 877 4.573425 16.594134
11 Investigate Person A15 476 2.482270 17.140799
12 Larceny D4 7554 28.323960 41.170700
13 Larceny A1 4849 18.181477 34.980522
14 Larceny B2 2939 11.019873 15.321656
15 Larceny C11 2226 8.346457 13.620510
16 Larceny C6 1971 7.390326 20.968085
17 Larceny D14 1671 6.265467 19.918941
18 Larceny B3 1240 4.649419 10.101010
19 Larceny E13 1224 4.589426 17.819188

BEST STATİON LOCATİON

In [27]:
location_data=data[["INCIDENT_NUMBER","DISTRICT","YEAR","Lat","Long","Location"]]
location_data.isnull().sum()
Out[27]:
INCIDENT_NUMBER        0
DISTRICT            1774
YEAR                   0
Lat                20632
Long               20632
Location               0
dtype: int64
In [28]:
location_data[location_data.Long.isnull()==True].head()
Out[28]:
INCIDENT_NUMBER DISTRICT YEAR Lat Long Location
150 I182079891 C6 2018 NaN NaN (0.00000000, 0.00000000)
151 I182079891 C6 2018 NaN NaN (0.00000000, 0.00000000)
216 I182079820 C11 2018 NaN NaN (0.00000000, 0.00000000)
220 I182079816 C11 2018 NaN NaN (0.00000000, 0.00000000)
248 I182079784 C11 2018 NaN NaN (0.00000000, 0.00000000)
In [29]:
location_data=location_data.dropna(subset = ['Lat'])
location_data[location_data.DISTRICT.isnull()==True]
Out[29]:
INCIDENT_NUMBER DISTRICT YEAR Lat Long Location
6 I182080048 NaN 2018 42.320734 -71.056764 (42.32073413, -71.05676415)
15 I182080038 NaN 2018 42.315961 -71.090426 (42.31596119, -71.09042564)
26 I182080025 NaN 2018 42.349524 -71.079493 (42.34952402, -71.07949284)
43 I182080007 NaN 2018 -1.000000 -1.000000 (-1.00000000, -1.00000000)
44 I182080006 NaN 2018 42.327847 -71.079387 (42.32784724, -71.07938680)
... ... ... ... ... ... ...
321256 I152056757 NaN 2015 42.257949 -71.161229 (42.25794926, -71.16122880)
323805 I152053820 NaN 2015 -1.000000 -1.000000 (-1.00000000, -1.00000000)
323806 I152053820 NaN 2015 -1.000000 -1.000000 (-1.00000000, -1.00000000)
324041 I152053552 NaN 2015 -1.000000 -1.000000 (-1.00000000, -1.00000000)
326740 I152050441 NaN 2015 42.331780 -71.113285 (42.33178000, -71.11328500)

1480 rows × 6 columns

In [30]:
location_data["DISTRICT"]=location_data["DISTRICT"].fillna("No_Info")
location_data["Location"]=location_data["Location"].apply(lambda x:x.strip("()").strip(" "))
location_data["Location"]=location_data["Location"].apply(lambda x:str(x.split(" ")[0]+x.split(" ")[1]))
location_data=location_data[~location_data.Long.isin([-1])].reset_index(drop=True)
location_data=location_data[location_data.YEAR==2017].reset_index(drop=True)
location_data.head()
Out[30]:
INCIDENT_NUMBER DISTRICT YEAR Lat Long Location
0 I182080026 C6 2017 42.317794 -71.042110 42.31779354,-71.04210959
1 I182079957 A1 2017 42.355633 -71.052646 42.35563262,-71.05264582
2 I182079957 A1 2017 42.355633 -71.052646 42.35563262,-71.05264582
3 I182079578 B2 2017 42.310900 -71.080077 42.31089986,-71.08007661
4 I182076588 C11 2017 42.295271 -71.067758 42.29527071,-71.06775815
In [31]:
randomlist = random.sample(range(0, location_data.index.max()), 2500)
WHS_COORD = [location_data["Lat"][0],location_data["Long"][0]]
map_nyc = folium.Map(location=WHS_COORD, zoom_start=11, width=740, height=500)

test_colors=['purple', 'orange', 'darkred', 'lightred', 'yellow', 
     'darkblue', 'darkgreen', 'white', 'pink', 'lightblue', 'lightgreen', 'gray', 'black',]
my_colors = {location_data.DISTRICT.unique()[i]:test_colors[i] for i in range(len(location_data.DISTRICT.unique()))} 

for i in randomlist:        
    my_color=my_colors[location_data["DISTRICT"][i]]

    folium.CircleMarker([location_data["Lat"][i],location_data["Long"][i]], radius=5,color=my_color, fill_color='#0080bb',popup=location_data["DISTRICT"][i]).add_to(map_nyc)
    
map_nyc
Out[31]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [32]:
A15_2016=data_non_duplicates[(data_non_duplicates["DISTRICT"]=="A15") & (data_non_duplicates["YEAR"]==2016)][["INCIDENT_NUMBER",
                                                                                                              "Lat","Long"]].dropna().reset_index(drop=True)

A15_2016=A15_2016[~A15_2016.Lat.isin([-1])].reset_index(drop=True)

pd.concat([A15_2016.head(),A15_2016.tail()])
Out[32]:
INCIDENT_NUMBER Lat Long
0 I182034896 42.376632 -71.055932
1 I182030597 42.381572 -71.066768
2 I172088472 42.378659 -71.057694
3 I172055445 42.374749 -71.052706
4 I172052015 42.380254 -71.071136
1711 I162000437 42.394213 -71.067413
1712 I162000303 42.374381 -71.065043
1713 I162000222 42.355216 -71.060129
1714 I162000197 42.376737 -71.056675
1715 I162000070 42.376852 -71.068873
In [34]:
Potential_Station_Number=30
Station_ids=["ST"+str(x) for x in range(1,Potential_Station_Number+1)]
randomlist = random.sample(range(0, A15_2016.index.max()), len(Station_ids))
potential_stations=pd.DataFrame({"Station_id":Station_ids,
              "Station_Lat":A15_2016.loc[randomlist,"Lat"].values.tolist(),
             "Station_Long":A15_2016.loc[randomlist,"Long"].values.tolist()})
In [35]:
WHS_COORD = [A15_2016["Lat"][0],A15_2016["Long"][0]]
map_nyc = folium.Map(location=WHS_COORD, zoom_start=13, width=740, height=500)


for i in range(len(A15_2016)):
    
    folium.CircleMarker([A15_2016["Lat"][i],A15_2016["Long"][i]], radius=5,color="red", fill_color='#0080bb',popup=A15_2016["INCIDENT_NUMBER"][i]).add_to(map_nyc)
    
for j in range(len(potential_stations)):
    folium.Marker([potential_stations["Station_Lat"][j],potential_stations["Station_Long"][j]], radius=5,popup=potential_stations["Station_id"][j]).add_to(map_nyc) 
map_nyc
Out[35]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [36]:
N = A15_2016["INCIDENT_NUMBER"].nunique()

print("The dimension : " + str(N)) 
res = [list(range(1 + N * i, 1 + N * (i + 1))) 
                            for i in range(N)]  
res=pd.DataFrame(res)
The dimension : 1716
In [37]:
res=res[:len(potential_stations)]
res.columns=A15_2016["INCIDENT_NUMBER"]
res[res > 0] = 0
res.index=potential_stations["Station_id"].values
In [38]:
potential_coords=[]
for j in range(len(potential_stations)):
    potential_coords.append(str(potential_stations["Station_Lat"][j])+","+str(potential_stations["Station_Long"][j]))

demand_coords=[]
for j in range(len(A15_2016)):
    demand_coords.append(str(A15_2016["Lat"][j])+","+str(A15_2016["Long"][j]))

res["potential_coords"]=potential_coords
demand_coords.append("0")
demand_coords=pd.DataFrame(demand_coords).T
demand_coords.columns=res.columns
res=res.append(demand_coords)
In [39]:
res
Out[39]:
INCIDENT_NUMBER I182034896 I182030597 I172088472 I172055445 I172052015 I172039185 I172036726 I172021770 I172012522 I172010009 I172009753 I172008457 I172008283 I172005509 I172002574 I172000863 I162106373 I162106345 I162106121 I162106017 I162105978 I162105970 I162105963 I162105926 I162105728 I162105693 I162105673 I162105591 I162105588 I162105543 I162105514 I162105506 I162105486 I162105472 I162105408 I162105292 I162105156 I162105104 I162105100 I162104808 I162104553 I162104536 I162104534 I162104450 I162104338 I162104323 I162104277 I162104015 I162103825 I162103759 ... I162002969 I162002927 I162002905 I162002900 I162002894 I162002876 I162002815 I162002797 I162002643 I162002348 I162002326 I162002259 I162002238 I162002223 I162002216 I162001983 I162001977 I162001948 I162001946 I162001945 I162001887 I162001855 I162001731 I162001661 I162001612 I162001539 I162001508 I162001476 I162001471 I162001335 I162001282 I162001261 I162001159 I162001113 I162001112 I162001044 I162000983 I162000876 I162000786 I162000694 I162000636 I162000611 I162000583 I162000528 I162000437 I162000303 I162000222 I162000197 I162000070 potential_coords
ST1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.38415612,-71.07257288
ST2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37687887,-71.061381
ST3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37877257,-71.05677893
ST4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.3738145,-71.06576148
ST5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37869265,-71.0655453
ST6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37515302,-71.05841364
ST7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37782671,-71.0695008
ST8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37530654,-71.05628689
ST9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.376852,-71.06887255
ST10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.38003093,-71.06248252
ST11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37994025,-71.06060665
ST12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.32823419,-71.08328981
ST13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37899255,-71.06407854
ST14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37663167,-71.05593196
ST15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37682129,-71.0513832
ST16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.32823419,-71.08328981
ST17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37749764,-71.05031219
ST18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37507158,-71.06517497
ST19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37663167,-71.05593196
ST20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37663167,-71.05593196
ST21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.35521625,-71.06012863
ST22 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37150618,-71.06042702
ST23 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37385595,-71.05168939
ST24 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.38100886,-71.07067342
ST25 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.35521625,-71.06012863
ST26 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37474921,-71.05270605
ST27 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37840325,-71.05729035
ST28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37796835,-71.06415861
ST29 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.37915428,-71.06022431
ST30 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42.3712103,-71.06036858
0 42.37663167,-71.05593196 42.38157231,-71.06676819 42.37865852,-71.05769433 42.37474921,-71.05270605 42.38025398,-71.07113638 42.38008015,-71.06432734 42.3787592,-71.05598086 42.35521625,-71.06012863 42.37765121,-71.06007636 42.37886303,-71.0622162 42.37419663,-71.05972003 42.37673728,-71.05667466 42.37295916,-71.06462811 42.37736388,-71.05791376 42.38321306,-71.07814394 42.37502629,-71.06294744 42.35521625,-71.06012863 42.38227721,-71.07975128 42.38080461,-71.0692612 42.37663167,-71.05593196 42.38378962,-71.07480607 42.37515337,-71.06409893 42.37797555,-71.05690284 42.37678313,-71.05070752 42.37663167,-71.05593196 42.37843402,-71.05644163 42.37905063,-71.06874987 42.32823419,-71.08328981 42.38100886,-71.07067342 42.37824004,-71.0604344 42.38180833,-71.0561542 42.37982544,-71.05931236 42.37932087,-71.0591444 42.37839561,-71.06459617 42.37693992,-71.05524264 42.37863317,-71.06157538 42.37749764,-71.05031219 42.37515302,-71.05841364 42.35521625,-71.06012863 42.37351306,-71.0597463 42.38020475,-71.057414 42.35521625,-71.06012863 42.35521625,-71.06012863 42.35521625,-71.06012863 42.37844511,-71.06634933 42.37490832,-71.06171012 42.376852,-71.06887255 42.3750272,-71.05423542 42.37836895,-71.05871896 42.3751514,-71.0639642 ... 42.37736388,-71.05791376 42.35521625,-71.06012863 42.37829205,-71.05387024 42.35521625,-71.06012863 42.37865852,-71.05769433 42.37847466,-71.06111453 42.37824004,-71.0604344 42.37094,-71.062064 42.38020475,-71.057414 42.37886303,-71.0622162 42.3791072,-71.055384 42.3787592,-71.05598086 42.37643498,-71.05559327 42.37886303,-71.0622162 42.37882713,-71.05894693 42.37304645,-71.06148199 42.37304645,-71.06148199 42.37643498,-71.05559327 42.37643498,-71.05559327 42.37643498,-71.05559327 42.37755188,-71.06524558 42.37210129,-71.06043085 42.38048207,-71.06087725 42.37797555,-71.05690284 42.37362201,-71.05691075 42.25985826,-71.12594231 42.38020475,-71.057414 42.37724912,-71.06673435 42.35521625,-71.06012863 42.38020475,-71.057414 42.37997627,-71.06692857 42.37782671,-71.0695008 42.37663167,-71.05593196 42.38117064,-71.07213904 42.38117064,-71.07213904 42.37746305,-71.06697969 42.35521625,-71.06012863 42.37847466,-71.06111453 42.37808452,-71.05784115 42.37932087,-71.0591444 42.37877257,-71.05677893 42.37868909,-71.0534645 42.3758558,-71.05659842 42.38025398,-71.07113638 42.394213,-71.067413 42.37438134,-71.06504282 42.35521625,-71.06012863 42.37673728,-71.05667466 42.376852,-71.06887255 0

31 rows × 1717 columns

Let's find the distances from potential stations to incedent locations

In [41]:
last_data=pd.DataFrame()
my_list=[]
for j in range(0,(len(res.columns)-1)):
    for i in range(len(res)-1):
        my_length=int(haversine([float(res.loc[res.index[i]]["potential_coords"].split(",")[0]),
              float(res.loc[res.index[i]]["potential_coords"].split(",")[1])],
             [float(res[res.columns[j]][0].split(",")[0]),float(res[res.columns[j]][0].split(",")[1])])*1000)
        my_list.append(my_length)  
    last_data[res.columns[j]]=my_list
    my_list=[]
last_data.index=potential_stations["Station_id"].values
In [42]:
last_data.head()
Out[42]:
I182034896 I182030597 I172088472 I172055445 I172052015 I172039185 I172036726 I172021770 I172012522 I172010009 I172009753 I172008457 I172008283 I172005509 I172002574 I172000863 I162106373 I162106345 I162106121 I162106017 I162105978 I162105970 I162105963 I162105926 I162105728 I162105693 I162105673 I162105591 I162105588 I162105543 I162105514 I162105506 I162105486 I162105472 I162105408 I162105292 I162105156 I162105104 I162105100 I162104808 I162104553 I162104536 I162104534 I162104450 I162104338 I162104323 I162104277 I162104015 I162103825 I162103759 ... I162003133 I162002969 I162002927 I162002905 I162002900 I162002894 I162002876 I162002815 I162002797 I162002643 I162002348 I162002326 I162002259 I162002238 I162002223 I162002216 I162001983 I162001977 I162001948 I162001946 I162001945 I162001887 I162001855 I162001731 I162001661 I162001612 I162001539 I162001508 I162001476 I162001471 I162001335 I162001282 I162001261 I162001159 I162001113 I162001112 I162001044 I162000983 I162000876 I162000786 I162000694 I162000636 I162000611 I162000583 I162000528 I162000437 I162000303 I162000222 I162000197 I162000070
ST1 1602 556 1366 1938 449 814 1489 3376 1255 1034 1530 1544 1405 1421 469 1286 3376 625 461 1602 187 1219 1459 1974 1602 1469 648 6280 383 1194 1373 1190 1227 916 1634 1092 1972 1534 3376 1584 1320 3376 3376 3376 815 1361 867 1816 1307 1225 ... 3376 1421 3376 1668 3376 1366 1133 1194 1704 1320 1034 1519 1489 1637 1034 1266 1534 1534 1637 1637 1637 949 1670 1043 1459 1739 14501 1320 905 3376 1320 656 747 1602 333 333 874 3376 1133 1385 1227 1428 1683 1604 449 1195 1250 3376 1544 867
ST2 448 684 361 750 884 430 490 2410 137 231 327 386 510 289 1546 242 2410 1623 780 448 1344 294 387 876 448 441 651 5700 890 170 696 369 327 313 504 195 911 310 2410 397 492 2410 2410 2410 443 220 615 622 274 286 ... 2410 289 2410 636 2410 361 178 170 662 492 231 551 490 477 231 294 426 426 477 477 477 326 536 402 387 515 14053 492 441 2410 492 571 675 448 1004 1004 464 2410 178 320 327 432 680 408 884 1990 409 2410 386 615
ST3 248 877 76 558 1190 636 65 2633 298 446 563 226 913 182 1823 655 2633 1926 1049 248 1582 723 89 545 248 46 983 6027 1168 306 341 238 203 643 239 394 549 424 2633 633 167 2633 2633 2633 786 590 1016 465 165 714 ... 2633 182 2633 244 2633 76 357 306 973 167 446 120 65 277 446 178 744 744 277 277 277 708 800 386 89 572 14393 167 835 2633 167 844 1050 248 1289 1289 850 2633 357 116 203 0 272 324 1190 1926 836 2633 226 1016
ST4 866 866 853 1077 841 706 973 2119 632 632 498 814 133 755 1458 267 2119 1485 828 866 1335 202 862 1279 866 921 631 5269 895 658 1188 852 818 518 931 636 1333 621 2119 495 987 2119 2119 2119 517 354 423 956 768 209 ... 2119 755 2119 1096 2119 853 643 658 440 987 632 1035 973 884 632 789 361 361 884 884 884 417 477 842 862 727 13603 987 390 2119 987 691 541 866 971 971 417 2119 643 805 818 921 1146 786 841 2272 86 2119 814 423
ST5 822 335 644 1142 490 183 785 2648 463 274 692 760 641 644 1150 460 2648 1233 385 822 948 411 714 1237 822 748 266 5797 493 422 845 527 530 84 868 326 1258 705 2648 747 688 2648 2648 2648 71 525 341 1014 561 414 ... 2648 644 2648 960 2648 644 364 422 908 688 274 835 785 855 274 542 711 711 855 855 855 129 844 431 714 906 14116 688 187 2648 688 182 338 822 607 607 180 2648 364 636 530 720 992 799 490 1732 481 2648 760 341

5 rows × 1716 columns

In [43]:
optimization_result=pd.DataFrame({"Station":[],"Total_Cost":[]})
In [44]:
for station in range(len(potential_stations)):
    potentials=potential_stations["Station_id"].values.tolist()[station:station+1]
    demands=A15_2016["INCIDENT_NUMBER"].values.tolist()
    distance=last_data[station:station+1].to_dict('index')
    prob = LpProblem("Transportation", LpMinimize)
    routes  =[(i,j) for i in potentials for j in demands]
    amount_vars = LpVariable.dicts("X",(last_data[station:station+1].index.tolist(),demands),lowBound=0, upBound=1, cat='Binary')
    prob += lpSum(amount_vars[i][j]*distance[i][j] for (i,j) in routes)
    for j in demands:
        prob += lpSum(amount_vars[i][j] for i in last_data[station:station+1].index.tolist()) == 1

    prob.solve()
    optimization_result=optimization_result.append(pd.DataFrame({"Station":potentials,"Total_Cost":value(prob.objective)}))
optimization_result=optimization_result.reset_index(drop=True)
In [46]:
optimization_result.head(10)
Out[46]:
Station Total_Cost
0 ST1 2443726.0
1 ST2 1364470.0
2 ST3 1532383.0
3 ST4 1636212.0
4 ST5 1507537.0
5 ST6 1484619.0
6 ST7 1770669.0
7 ST8 1585033.0
8 ST9 1716365.0
9 ST10 1488685.0
In [45]:
optimization_result[optimization_result["Total_Cost"]==optimization_result["Total_Cost"].min()]
Out[45]:
Station Total_Cost
1 ST2 1364470.0
In [47]:
best_station_index=optimization_result[optimization_result["Total_Cost"]==optimization_result["Total_Cost"].min()].index[0]

Best Location for Just One Station

In [48]:
WHS_COORD = [A15_2016["Lat"][0],A15_2016["Long"][0]]
map_nyc = folium.Map(location=WHS_COORD, zoom_start=13, width=740, height=500)


for i in range(len(A15_2016)):
    
    folium.CircleMarker([A15_2016["Lat"][i],A15_2016["Long"][i]], radius=5,color="red", fill_color='#0080bb',popup=A15_2016["INCIDENT_NUMBER"][i]).add_to(map_nyc)
    

folium.Marker([potential_stations["Station_Lat"][best_station_index],potential_stations["Station_Long"][best_station_index]], radius=5,popup=potential_stations["Station_id"][best_station_index]).add_to(map_nyc) 
map_nyc
Out[48]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [49]:
from itertools import combinations 
def rSubset(arr, r): 
    return list(combinations(arr, r)) 

Let's find the best 3 station locations to be opened at the same time

I reduced the number of candidate stations to 15 due to the combination situation.
In [56]:
potential_stations=potential_stations[:15]
In [67]:
potential_stations=potential_stations[~potential_stations["Station_id"].isin(["ST12"])].reset_index()
In [68]:
total_station_number=3
optimization_result_3_stations=pd.DataFrame({"Station":[],"Total_Cost":[]})
In [69]:
for station in rSubset(potential_stations["Station_id"], total_station_number):
    potentials=potential_stations[potential_stations["Station_id"].isin(station)]["Station_id"].values.tolist()
    demands=A15_2016["INCIDENT_NUMBER"].values.tolist()
    distance=last_data.loc[potentials,:].to_dict('index')
    prob = LpProblem("Transportation", LpMinimize)
    routes =[(i,j) for i in potentials for j in demands]
    amount_vars = LpVariable.dicts("X",(potentials,demands),lowBound=0, upBound=1, cat='Binary')
    prob += lpSum(amount_vars[i][j]*distance[i][j] for (i,j) in routes)
    for j in demands:
        prob += lpSum(amount_vars[i][j] for i in potentials) == 1

    prob.solve()
    optimization_result_3_stations=optimization_result_3_stations.append(pd.DataFrame({"Station":station,"Total_Cost":value(prob.objective)}))
optimization_result_3_stations=optimization_result_3_stations.reset_index(drop=True)
In [70]:
optimization_result_3_stations[optimization_result_3_stations["Total_Cost"]==optimization_result_3_stations["Total_Cost"].min()]["Station"].values.tolist()
Out[70]:
['ST1', 'ST3', 'ST4']
In [71]:
WHS_COORD = [A15_2016["Lat"][0],A15_2016["Long"][0]]
map_nyc = folium.Map(location=WHS_COORD, zoom_start=13, width=740, height=500)


for i in range(len(A15_2016)):
    
    folium.CircleMarker([A15_2016["Lat"][i],A15_2016["Long"][i]], radius=5,color="red", fill_color='#0080bb',popup=A15_2016["INCIDENT_NUMBER"][i]).add_to(map_nyc)
    

folium.Marker([potential_stations["Station_Lat"][0],potential_stations["Station_Long"][0]], radius=5,popup=potential_stations["Station_id"][0]).add_to(map_nyc) 
folium.Marker([potential_stations["Station_Lat"][2],potential_stations["Station_Long"][2]], radius=5,popup=potential_stations["Station_id"][2]).add_to(map_nyc) 
folium.Marker([potential_stations["Station_Lat"][3],potential_stations["Station_Long"][3]], radius=5,popup=potential_stations["Station_id"][3]).add_to(map_nyc) 

map_nyc
Out[71]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
 
In [ ]:
 
In [72]:
year_2016=data_non_duplicates[data_non_duplicates.YEAR==2016].reset_index(drop=True)
In [73]:
year_2016["DAY"]=year_2016["OCCURRED_ON_DATE"].astype(str).apply(lambda x:x.split(" ")[0])
year_2016["HOUR"]=year_2016["OCCURRED_ON_DATE"].astype(str).apply(lambda x:x.split(" ")[1].split(":")[0])

Histogram of Daily Incident Numbers

In [74]:
sns.displot(year_2016.groupby(["DAY"])["INCIDENT_NUMBER"].count().values.tolist())
plt.title("Histogram of Daily Incident Numbers")
plt.show()
In [75]:
plt.figure(figsize=(8,6))
plt.title(r'Hourly Incidents')
sns.barplot(x="HOUR", y="INCIDENT_NUMBER", data=pd.DataFrame(year_2016.groupby(["HOUR"])["INCIDENT_NUMBER"].count()).reset_index())
plt.show()
In [76]:
year_2016_A1=year_2016[year_2016.DISTRICT=="A1"].reset_index(drop=True)
sns.displot(year_2016_A1.groupby(["DAY"])["INCIDENT_NUMBER"].count().values.tolist())
plt.title("Histogram of Daily Incident Numbers for A1 Region")
plt.show()
In [77]:
print("Mean of Daily Unique Incidents for A1 = {}".format(np.mean(year_2016_A1.groupby(["DAY"])["INCIDENT_NUMBER"].count().values.tolist())))
print("Mean of Daily Unique Incidents = {}".format(np.mean(year_2016.groupby(["DAY"])["INCIDENT_NUMBER"].count().values.tolist())))
Mean of Daily Unique Incidents for A1 = 25.704918032786885
Mean of Daily Unique Incidents = 240.2377049180328
In [ ]: